In [1]:
%matplotlib inline
import numpy as np
from matplotlib import pyplot as plt
In [2]:
# implementation of pairwise distance function
# make datasets of coordinates
I = -np.random.randint(10,size=(4,2))
J = -np.random.randint(10,size=(3,2))
I
Out[2]:
In [3]:
J
Out[3]:
We want to replicate what the scipy cdist function does:
http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.spatial.distance.cdist.html
In [4]:
from scipy.spatial import distance
distance.cdist(I, J)
Out[4]:
In [23]:
# ignoring other metrics, we want to implement euclidean distance
# sqrt((x0-x1)**2 + (y0-y1)**2)
In [12]:
np.mgrid[:I.shape[0],:J.shape[0]]
Out[12]:
In [20]:
# create a rectangular grid that is Ni by Nj
i, j = np.mgrid[:I.shape[0],:J.shape[0]]
i
Out[20]:
In [25]:
j
Out[25]:
In [24]:
# now we want the pairs of points I[i] and J[j]
J[j]
Out[24]:
In [32]:
J[j]
Out[32]:
In [25]:
# to subtract x0-x1 and y0-y1 simultaneously, we need this expression
I[i] - J[j]
Out[25]:
In [28]:
# then we square it
s = (I[i] - J[j])**2
s
Out[28]:
In [31]:
# now take the sum of squares along the 3rd axis (we're summing (x0-x1)**2 and (y0-y1)**2)
sq = np.sum(s,axis=2)
sq
Out[31]:
In [32]:
# now take the square root
np.sqrt(sq)
Out[32]:
In [33]:
# here it is as a function
def cdist(I, J):
i, j = np.mgrid[:I.shape[0],:J.shape[0]]
return np.sqrt(np.sum((I[i] - J[j])**2,axis=2))
cdist(I, J)
Out[33]: